library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.1     ✔ purrr   1.0.1
## ✔ tibble  3.1.8     ✔ dplyr   1.1.0
## ✔ tidyr   1.3.0     ✔ stringr 1.5.0
## ✔ readr   2.1.3     ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(ggplot2)
library(data.table)
## 
## Attaching package: 'data.table'
## 
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## 
## The following object is masked from 'package:purrr':
## 
##     transpose
library(parallel)
# library(igraph)
library(here)
## here() starts at /home/n.brouwer/MEP
library(fst)
library(assertthat)
## 
## Attaching package: 'assertthat'
## 
## The following object is masked from 'package:tibble':
## 
##     has_name
library(RColorBrewer)
# library(latex2exp)
library(cowplot)
library(ggpubr)
## 
## Attaching package: 'ggpubr'
## 
## The following object is masked from 'package:cowplot':
## 
##     get_legend
library(gtools)
source(here("UtilityFunctions.R"))
source(here("MEP_UtilityFunctions.R"))
library('glmnet', quiet = T)
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Loaded glmnet 4.1-6
## 
## Attaching package: 'glmnet'
## 
## The following object is masked from 'package:gtools':
## 
##     na.replace
library('ROCR', quiet = T)
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.14.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
## 
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##     genomic data. Bioinformatics 2016.
## 
## 
## The new InteractiveComplexHeatmap package can directly export static 
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(corrplot)
## corrplot 0.92 loaded
library(tidyHeatmap)
## ========================================
## tidyHeatmap version 1.8.1
## If you use tidyHeatmap in published research, please cite:
## 1) Mangiola et al. tidyHeatmap: an R package for modular heatmap production 
##   based on tidy principles. JOSS 2020.
## 2) Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional 
##   genomic data. Bioinformatics 2016.
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(tidyHeatmap))
## ========================================
## 
## 
## Attaching package: 'tidyHeatmap'
## 
## The following object is masked from 'package:stats':
## 
##     heatmap
# set seed
projectSeed <- 89230689
set.seed(projectSeed)

outDir <- here('scratch')
cells <- getCells()
clinical_data <- getClinical()

Heatmaps

# Are we going to scale the rows as well? The sums of each row are always 2 (1 for tumor cells, 1 for TME cells)

cellPhenotypes <- getCellProportionsPerImage()
density_matrix <- generate_matrix(cellPhenotypes, 'ImageNumber')

library(circlize)
## ========================================
## circlize version 0.4.15
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
## 
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
##   in R. Bioinformatics 2014.
## 
## This message can be suppressed by:
##   suppressPackageStartupMessages(library(circlize))
## ========================================
col_fun = colorRamp2(c(-4, 0, 4), c("blue", "white", "red"))

density_hm <- Heatmap(as.matrix(density_matrix %>% select(-c('isTumour'))),name='densities', column_title = 'cell type', 
                      row_title = 'sample', row_names_gp = gpar(fontsize=2), column_names_gp = gpar(fontsize=15),cluster_rows = T, cluster_columns = T, col=col_fun)

save_pdf(density_hm, here('output/Method_comparison/heatmaps/density_heatmap.pdf'), width=20, height=15)

PCA

Now perform PCA to separate the samples.

generate_labels <- function(matrix){
labels <- merge(as_tibble(rownames(density_matrix)), clinical_data, by.x='value', by.y='ImageNumber', all.x=T)
labels <- labels %>% rename(ImageNumber = value)%>%
  mutate(HER = ifelse(PAM50=='Her2', 'HER2', 'Other')) %>%
  mutate(LuminalA = ifelse(PAM50=='LumA', 'Luminal A', 'Other')) %>%
  mutate(LuminalB = ifelse(PAM50=='LumB', 'Luminal B', 'Other')) %>%
  mutate(Bas = ifelse(PAM50=='Basal', 'Basal', 'Other')) %>%
  mutate(Norm= ifelse(PAM50=='Normal', 'Normal', 'Other'))

colnames(labels) <- gsub(" ", "_", colnames(labels))
return(labels)
}


plot_pca <- function(matrix, label){
  labels = generate_labels(matrix)
  matrix_withlabels <- cbind.data.frame(matrix, rownames(matrix))
  colnames(matrix_withlabels)[ncol(matrix_withlabels)] = 'ImageNumber'
  matrix_withlabels <- merge(matrix_withlabels, labels, by='ImageNumber', all.x=T)

  res.pca <- prcomp(matrix_withlabels %>% select(-colnames(labels)), scale = FALSE)
  print(fviz_eig(res.pca))
  print(fviz_pca_biplot(res.pca, label="var", habillage=matrix_withlabels %>% pull(!!label), addEllipses = T, ellipse.level = 0.95,select.var = list(contrib = 5) ))
  
}

plot_pca(density_matrix, 'PAM50')

## Warning: Removed 113 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).

ggsave(here('output/Method_comparison/pca/pca_density_PAM50.pdf'))
## Saving 7 x 5 in image
## Warning: Removed 113 rows containing missing values (`geom_point()`).
## Removed 1 rows containing missing values (`geom_point()`).
plot_pca(density_matrix, 'IntClust')

## Warning: Removed 113 rows containing missing values (`geom_point()`).
## Removed 1 rows containing missing values (`geom_point()`).

ggsave(here('output/Method_comparison/pca/pca_density_IntClust.pdf'))
## Saving 7 x 5 in image
## Warning: Removed 113 rows containing missing values (`geom_point()`).
## Removed 1 rows containing missing values (`geom_point()`).

Now consider tumor and TME cells separately.

TME <- getTumorAndTMETypes()$TME_types
tumor <- getTumorAndTMETypes()$tumor_types
TME_colnames = paste(TME, '_CPh',sep='')
tumor_colnames = paste(tumor, '_CPh', sep='')

tumor_densities <- as.data.frame(cellPhenotypes)[,c(tumor_colnames, 'ImageNumber')]
tumor_densities <- na.omit(tumor_densities)
tumor_density_matrix <- generate_matrix(tumor_densities,'ImageNumber')

tumor_density_hm <- Heatmap(as.matrix(tumor_density_matrix),name='densities', column_title = 'cell type', 
                      row_title = 'sample', row_names_gp = gpar(fontsize=2), column_names_gp = gpar(fontsize=15),cluster_rows = T, cluster_columns = T, col=col_fun)

save_pdf(tumor_density_hm, here('output/Method_comparison/heatmaps/tumor_density_heatmap.pdf'), width=20, height=15)

plot_pca(tumor_density_matrix, 'PAM50')

## Warning: Removed 99 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).

ggsave(here('output/Method_comparison/pca/pca_tumor_density.pdf'))
## Saving 7 x 5 in image
## Warning: Removed 99 rows containing missing values (`geom_point()`).
## Removed 1 rows containing missing values (`geom_point()`).
plot_pca(tumor_density_matrix, 'IntClust')

## Warning: Removed 99 rows containing missing values (`geom_point()`).
## Removed 1 rows containing missing values (`geom_point()`).

ggsave(here('output/Method_comparison/pca/pca_tumor_density_IntClust.pdf'))
## Saving 7 x 5 in image
## Warning: Removed 99 rows containing missing values (`geom_point()`).
## Removed 1 rows containing missing values (`geom_point()`).
tme_densities <- as.data.frame(cellPhenotypes)[,c(TME_colnames, 'ImageNumber')]
tme_densities <- na.omit(tme_densities)
tme_density_matrix <- generate_matrix(tme_densities,'ImageNumber')

tme_density_hm <- Heatmap(as.matrix(tme_density_matrix),name='densities', column_title = 'cell type', 
                      row_title = 'sample', row_names_gp = gpar(fontsize=2), column_names_gp = gpar(fontsize=15),cluster_rows = T, cluster_columns = T, col=col_fun)

save_pdf(tme_density_hm, here('output/Method_comparison/heatmaps/tme_density_heatmap.pdf'), width=20, height=15)

plot_pca(tme_density_matrix, 'PAM50')

## Warning: Removed 113 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).

ggsave(here('output/Method_comparison/pca/pca_tme_density.pdf'))
## Saving 7 x 5 in image
## Warning: Removed 113 rows containing missing values (`geom_point()`).
## Removed 1 rows containing missing values (`geom_point()`).
plot_pca(tme_density_matrix, 'IntClust')

## Warning: Removed 113 rows containing missing values (`geom_point()`).
## Removed 1 rows containing missing values (`geom_point()`).

ggsave(here('output/Method_comparison/pca/pca_tme_density_IntClust.pdf'))
## Saving 7 x 5 in image
## Warning: Removed 113 rows containing missing values (`geom_point()`).
## Removed 1 rows containing missing values (`geom_point()`).

Take a look at the PAM50 patient groups.

subtypes <- unique(merge(density_matrix, clinical_data, by.x='row.names', by.y='ImageNumber', all.x=T) %>% pull('PAM50'))

for (s in subtypes){
  if (is.na(s)){
      samples_cellPHenotypes <- density_matrix %>% filter(rownames(density_matrix) %in% (clinical_data %>% filter(is.na(`PAM50`)) %>% pull(`ImageNumber`))) %>% select(-c('isTumour'))
  
  }else{
  samples_cellPHenotypes <- density_matrix %>% filter(rownames(density_matrix) %in% (clinical_data %>% filter(`PAM50` == s) %>% pull(`ImageNumber`))) %>% select(-c('isTumour'))
  }
  
  subtype_hm <- Heatmap(as.matrix(samples_cellPHenotypes),name=paste(s, 'samples'), column_title = 'cell type', 
                      row_title = 'sample', row_names_gp = gpar(fontsize=2), column_names_gp = gpar(fontsize=15),cluster_rows = T, cluster_columns = T, col=col_fun)
  save_pdf(subtype_hm, paste(here('output/Method_comparison/heatmaps/'),'density_heatmap_',s,'.pdf',sep=''), width=20, height=14)
  
}

PAM50 do not have clear cell type density signatures. The subtypes are based on gene signatures of tumor cells. Let’s have a look at the type of tumor cells in the subtypes

for (s in subtypes){
  samples_cellPHenotypes <- tumor_density_matrix %>% filter(rownames(tumor_density_matrix) %in% (clinical_data %>% filter(`PAM50` == s) %>% pull(`ImageNumber`)))
  
  subtype_hm <- Heatmap(as.matrix(samples_cellPHenotypes),name=paste(s, 'samples'), column_title = 'cell type', 
                      row_title = 'sample', row_names_gp = gpar(fontsize=2), column_names_gp = gpar(fontsize=13),cluster_rows = T, cluster_columns = F, col=col_fun)
  save_pdf(subtype_hm, paste(here('output/Method_comparison/heatmaps/'),'tumor_density_heatmap_',s,'.pdf',sep=''), width=20, height=14)
  
}

Basal subtypes with ER+ cells

Patients with basal subtypes contain ER+ cells. Lets look at the slides of these patients.

positive_celltypes <- c("ER^{hi}CXCL12^{+}", "CK8-18^{+} ER^{hi}")

Basal_cellPHenotypes <- tumor_densities %>% filter(rownames(tumor_density_matrix) %in% (clinical_data %>% filter(`PAM50` == 'Basal') %>% pull(`ImageNumber`)))
positive_samples <- Basal_cellPHenotypes %>% filter(get("CK8-18^{+} ER^{hi}_CPh") > 0 | get("ER^{hi}CXCL12^{+}_CPh") > 0)  %>% pull('ImageNumber')

# count_features <- getDensityFeatures()
# counts_positive_samples <- count_features %>% filter(ImageNumber %in% (positive_samples %>% pull('ImageNumber')))

for (j in 1:10){
  i = positive_samples[j]
  s <- show_slide(i,positive_celltypes)
  print(s)
  ggsave(paste('output/Slides/basal_slide',i,'.pdf',sep=''), width = 15, height=10)
}
## Warning in as_grob.default(plot): Cannot convert object of class numeric into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class numeric into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class numeric into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class numeric into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class numeric into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class numeric into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class numeric into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class numeric into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class numeric into a
## grob.

## Warning in as_grob.default(plot): Cannot convert object of class numeric into a
## grob.

In some samples the ER+ cell count is very low.

basal_counts <- cells %>% dplyr::count(ImageNumber, meta_description) %>% mutate(type = ifelse((ImageNumber %in% positive_samples),'Basal with ER+ cells','Other types with ER+ cells'))

basal_counts <- merge(basal_counts, clinical_data %>% select(c("ImageNumber", "PAM50")), by='ImageNumber')

ggplot(data=basal_counts %>% filter(meta_description %in% positive_celltypes)) + theme_bw() + geom_boxplot(aes(x=PAM50,y=n,color=meta_description)) + ylim(0,50) + ggtitle('ER+ cell count in samples marked as basal and other types ')
## Warning: Removed 228 rows containing non-finite values (`stat_boxplot()`).

ggsave(here('output/Density_results/ERcellCount_Basalsubtype.pdf'))
## Saving 7 x 5 in image
## Warning: Removed 228 rows containing non-finite values (`stat_boxplot()`).

Look at the expression of these cells

positive_basal_cells <- cells %>% filter(meta_description %in% positive_celltypes) %>% mutate(type = ifelse((ImageNumber %in% positive_samples),'Basal with ER+ cells','Other types with ER+ cells'))
ggplot(data=positive_basal_cells) + theme_bw() + xlab('ER expression') + geom_boxplot(aes(x=type, y=ER)) + ylim(0,200) + ggtitle('Meassured ER expression of ER+ cells in samples marked as Basal and other type') + xlab('sample type') + ylab('ER expression')
## Warning: Removed 1672 rows containing non-finite values (`stat_boxplot()`).

ggsave(here('output/Density_results/ERexpression_Basalsubtype.pdf'))
## Saving 7 x 5 in image
## Warning: Removed 1672 rows containing non-finite values (`stat_boxplot()`).

The expression of these cells is not lower as other ER+ cells and are therefore correctly marked as ER+ cells.

subtypes <- setdiff(unique(clinical_data %>% pull(PAM50)), NA)
cell_count_matrix <- getDensityFeatures() %>% select(-c('isTumour'))
cell_count_matrix_withlabels <- merge(cell_count_matrix, clinical_data %>% select(c(PAM50, ImageNumber)), by='ImageNumber', all.x=T)

zero_count_result <- tibble(features = colnames(cell_count_matrix)[2:33])
for (s in 1:length(subtypes)){
  subset <- t(cell_count_matrix_withlabels %>% filter(`PAM50` == subtypes[[s]]))
  subset <- subset[2:33,]
  subset_m <- matrix(as.numeric(subset),    # Convert to numeric matrix
                    ncol = ncol(subset))
  zero_count <- rowSums(subset_m != 0)  / ncol(subset_m)
  zero_count_result <- cbind(zero_count_result, zero_count)
  
}

colnames(zero_count_result) <- c('feature', subtypes)
rownames(zero_count_result) <- zero_count_result$feature
zero_count_result <- zero_count_result[,-1]

h <- Heatmap(zero_count_result,name='not zero percentage', cluster_columns = F, cluster_rows = T)
## Warning: The input is a data frame-like object, convert it to a matrix.
save_pdf(h, here('output/Density_results/zero_counts_per_subtype.pdf'),width=10, height=10)

Densities in PAM50 subtypes

library(matrixStats)
## 
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
## 
##     count
count_result <- tibble(features = colnames(cell_count_matrix)[2:33])
for (s in 1:length(subtypes)){
  subset <- t(cell_count_matrix_withlabels %>% filter(`PAM50` == subtypes[[s]]))
  subset <- subset[2:33,]
  subset_m <- matrix(as.numeric(subset),    # Convert to numeric matrix
                    ncol = ncol(subset))
  means <- rowMeans(subset_m)
  count_result <- cbind(count_result, means)
  
}

colnames(count_result) <- c('feature', subtypes)
rownames(count_result) <- count_result$feature
count_result <- count_result[,-1]

h <- Heatmap(count_result,name='mean count', cluster_columns = F, cluster_rows = F)
## Warning: The input is a data frame-like object, convert it to a matrix.
save_pdf(h, here('output/Density_results/mean_count_per_subtype.pdf'),width=10, height=10)

count_result <- tibble(features = colnames(cell_count_matrix)[2:33])
for (s in 1:length(subtypes)){
  subset <- t(cell_count_matrix_withlabels %>% filter(`PAM50` == subtypes[[s]]))
  subset <- subset[2:33,]
  subset_m <- matrix(as.numeric(subset),    # Convert to numeric matrix
                    ncol = ncol(subset))
  means <- rowMedians(subset_m)
  count_result <- cbind(count_result, means)
  
}

colnames(count_result) <- c('feature', subtypes)
rownames(count_result) <- count_result$feature
count_result <- count_result[,-1]

h <- Heatmap(count_result,name='median count', cluster_columns = F, cluster_rows = F)
## Warning: The input is a data frame-like object, convert it to a matrix.
save_pdf(h, here('output/Density_results/median_count_per_subtype.pdf'),width=10, height=10)
library(matrixStats)

cellPhenotypes_withlabels <- merge(x=cellPhenotypes, y= clinical_data %>% select(c('ImageNumber', 'PAM50')), by='ImageNumber', all.x=T)
cellPhenotypes_withlabels <- na.omit(cellPhenotypes_withlabels)

count_result <- tibble(features = colnames(cellPhenotypes_withlabels)[2:33])
for (s in 1:length(subtypes)){
  subset <- cellPhenotypes_withlabels %>% filter(`PAM50` == subtypes[[s]])
  means <- colMeans(subset[,2:33])
  count_result <- cbind(count_result, means)
  
}

colnames(count_result) <- c('feature', subtypes)
rownames(count_result) <- count_result$feature
count_result <- count_result[,-1]

h <- Heatmap(count_result,name='mean proportion', cluster_columns = F, cluster_rows = T)
## Warning: The input is a data frame-like object, convert it to a matrix.
save_pdf(h, here('output/Density_results/mean_proportion_per_subtype.pdf'),width=10, height=10)

count_result <- tibble(features = colnames(cellPhenotypes_withlabels)[2:33])
for (s in 1:length(subtypes)){
  subset <- cellPhenotypes_withlabels %>% filter(`PAM50` == subtypes[[s]])
  means <- apply(subset[,2:33],2,median)
  count_result <- cbind(count_result, means)
  
}

colnames(count_result) <- c('feature', subtypes)
rownames(count_result) <- count_result$feature
count_result <- count_result[,-1]

h <- Heatmap(count_result,name='median proportion', cluster_columns = F, cluster_rows = T)
## Warning: The input is a data frame-like object, convert it to a matrix.
save_pdf(h, here('output/Density_results/median_proportion_per_subtype.pdf'),width=10, height=10)